#Import necessary libraries
import json
from IPython.display import Image
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
%load_ext autoreload
%autoreload 2
%matplotlib inline
This project focuses on analyzing the patent information retrieved from the patentsview API. The PatentsView API is intended to inspire the exploration and enhanced understanding of US intellectual property (IP) and innovation systems.
The base endpoint url for the patent API is given as follows.
This API version does not require any API key and has the following query parameters embedded to the url.
The max size of results per page is limited to 10000 in this API version.
Steps followed in the data retrieval process are mentioned as follows.
#List of response fields
response_fields_list = [ "patent_firstnamed_assignee_city", "patent_firstnamed_assignee_country",
"patent_firstnamed_assignee_id", "patent_firstnamed_assignee_latitude",
"patent_firstnamed_assignee_longitude","patent_firstnamed_assignee_state",
"patent_firstnamed_inventor_city", "patent_firstnamed_inventor_country",
"patent_firstnamed_inventor_id", "patent_firstnamed_inventor_latitude",
"patent_firstnamed_inventor_longitude", "patent_firstnamed_inventor_state",
"patent_num_cited_by_us_patents", "patent_number", "patent_title",
"patent_type", "patent_year", "patent_date"]
#Filters for the results
patent_inv_country = "US"
patent_assignee_country = "US"
# Form the final url
base_url = 'https://api.patentsview.org/patents/query?q={"_gte":{'
parameter_1 = f'"patent_firstnamed_inventor_country":"{patent_inv_country}"'
parameter_2 = f'"patent_firstnamed_assignee_country":"{patent_assignee_country}"'
response_fields = '}}&f=['
url = base_url + parameter_1 + "," + parameter_2 + response_fields
final_url = url
for i in range(0, len(response_fields_list)):
final_url = final_url + '"' + response_fields_list[i] + '",'
#print(final_url)
final_url = final_url[:-1:]
final_url = final_url + ']' + '&o={"per_page": 10000} &s=[{"patent_number":"asc"}]'
print(final_url)
payload={}
response = requests.request("GET", final_url, data=payload)
#print(response.text)
https://api.patentsview.org/patents/query?q={"_gte":{"patent_firstnamed_inventor_country":"US","patent_firstnamed_assignee_country":"US"}}&f=["patent_firstnamed_assignee_city","patent_firstnamed_assignee_country","patent_firstnamed_assignee_id","patent_firstnamed_assignee_latitude","patent_firstnamed_assignee_longitude","patent_firstnamed_assignee_state","patent_firstnamed_inventor_city","patent_firstnamed_inventor_country","patent_firstnamed_inventor_id","patent_firstnamed_inventor_latitude","patent_firstnamed_inventor_longitude","patent_firstnamed_inventor_state","patent_num_cited_by_us_patents","patent_number","patent_title","patent_type","patent_year","patent_date"]&o={"per_page": 10000} &s=[{"patent_number":"asc"}]
#Write the response to patent_details.json
with open('output/patent_details.json', 'w') as fout:
fout.write(response.text)
Reading the json file back in.
patent_details_text = open('output/patent_details.json').read()
patent_details = json.loads(patent_details_text)
print(type(patent_details))
<class 'dict'>
Store the patent details to a dataframe
patent_details_df = pd.DataFrame(patent_details['patents'])
#Convert patent_date to date and extract patent month from patent date
patent_details_df['patent_date'] = pd.to_datetime(patent_details_df['patent_date'], format="%Y/%m/%d")
patent_details_df['patent_month'] = pd.DatetimeIndex(patent_details_df['patent_date']).month
print(patent_details_df.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10000 entries, 0 to 9999 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 patent_firstnamed_assignee_city 9049 non-null object 1 patent_firstnamed_assignee_country 9106 non-null object 2 patent_firstnamed_assignee_id 9125 non-null object 3 patent_firstnamed_assignee_latitude 9063 non-null object 4 patent_firstnamed_assignee_longitude 9063 non-null object 5 patent_firstnamed_assignee_state 8391 non-null object 6 patent_firstnamed_inventor_city 10000 non-null object 7 patent_firstnamed_inventor_country 10000 non-null object 8 patent_firstnamed_inventor_id 10000 non-null object 9 patent_firstnamed_inventor_latitude 9936 non-null object 10 patent_firstnamed_inventor_longitude 9936 non-null object 11 patent_firstnamed_inventor_state 9983 non-null object 12 patent_num_cited_by_us_patents 10000 non-null object 13 patent_number 10000 non-null object 14 patent_title 10000 non-null object 15 patent_type 10000 non-null object 16 patent_year 10000 non-null object 17 patent_date 10000 non-null datetime64[ns] 18 patent_month 10000 non-null int64 dtypes: datetime64[ns](1), int64(1), object(17) memory usage: 1.4+ MB None
patent_details_df.head(5)
| patent_firstnamed_assignee_city | patent_firstnamed_assignee_country | patent_firstnamed_assignee_id | patent_firstnamed_assignee_latitude | patent_firstnamed_assignee_longitude | patent_firstnamed_assignee_state | patent_firstnamed_inventor_city | patent_firstnamed_inventor_country | patent_firstnamed_inventor_id | patent_firstnamed_inventor_latitude | patent_firstnamed_inventor_longitude | patent_firstnamed_inventor_state | patent_num_cited_by_us_patents | patent_number | patent_title | patent_type | patent_year | patent_date | patent_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Waltham | US | e915981d-a0eb-4a13-a817-bc4597d35b98 | 42.3756 | -71.2358 | MA | Manhattan Beach | US | fl:jo_ln:marron-5 | 33.8842 | -118.426 | CA | 4 | 10000000 | Coherent LADAR using intra-pixel quadrature de... | utility | 2018 | 2018-06-19 | 6 |
| 1 | Brookfield | US | 0bf96d08-fe99-4bbb-ae0d-52bc76f5b051 | 43.0607 | -88.1263 | WI | Brookfield | US | fl:co_ln:dickert-1 | 43.0607 | -88.1263 | WI | 4 | 10000007 | PEX expanding tool | utility | 2018 | 2018-06-19 | 6 |
| 2 | Northvale | US | dc1b4faf-5315-40df-af2b-e688832e5286 | 41.0086 | -73.9482 | NJ | Closter | US | fl:li_ln:caspi-2 | 40.9709 | -73.969 | NJ | 0 | 10000008 | Bracelet mold and method of use | utility | 2018 | 2018-06-19 | 6 |
| 3 | None | None | None | None | None | None | Hayward | US | fl:na_ln:maier-3 | 37.6688 | -122.081 | CA | 1 | 10000009 | Sterile environment for additive manufacturing | utility | 2018 | 2018-06-19 | 6 |
| 4 | Norwalk | US | 903324e5-5bbb-4187-8ee9-4f3291165199 | 41.0958 | -73.4205 | CT | Rochester | US | fl:ro_ln:irizarry-1 | 43.1855 | -77.6107 | NY | 0 | 10000010 | 3-D electrostatic printer using rack and pinio... | utility | 2018 | 2018-06-19 | 6 |
#Check for NaN values
patent_details_df.isna().any()
patent_firstnamed_assignee_city True patent_firstnamed_assignee_country True patent_firstnamed_assignee_id True patent_firstnamed_assignee_latitude True patent_firstnamed_assignee_longitude True patent_firstnamed_assignee_state True patent_firstnamed_inventor_city False patent_firstnamed_inventor_country False patent_firstnamed_inventor_id False patent_firstnamed_inventor_latitude True patent_firstnamed_inventor_longitude True patent_firstnamed_inventor_state True patent_num_cited_by_us_patents False patent_number False patent_title False patent_type False patent_year False patent_date False patent_month False dtype: bool
#Drop rows with NaN values
patent_details_df.dropna(inplace = True)
patent_details_df.head(5)
| patent_firstnamed_assignee_city | patent_firstnamed_assignee_country | patent_firstnamed_assignee_id | patent_firstnamed_assignee_latitude | patent_firstnamed_assignee_longitude | patent_firstnamed_assignee_state | patent_firstnamed_inventor_city | patent_firstnamed_inventor_country | patent_firstnamed_inventor_id | patent_firstnamed_inventor_latitude | patent_firstnamed_inventor_longitude | patent_firstnamed_inventor_state | patent_num_cited_by_us_patents | patent_number | patent_title | patent_type | patent_year | patent_date | patent_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Waltham | US | e915981d-a0eb-4a13-a817-bc4597d35b98 | 42.3756 | -71.2358 | MA | Manhattan Beach | US | fl:jo_ln:marron-5 | 33.8842 | -118.426 | CA | 4 | 10000000 | Coherent LADAR using intra-pixel quadrature de... | utility | 2018 | 2018-06-19 | 6 |
| 1 | Brookfield | US | 0bf96d08-fe99-4bbb-ae0d-52bc76f5b051 | 43.0607 | -88.1263 | WI | Brookfield | US | fl:co_ln:dickert-1 | 43.0607 | -88.1263 | WI | 4 | 10000007 | PEX expanding tool | utility | 2018 | 2018-06-19 | 6 |
| 2 | Northvale | US | dc1b4faf-5315-40df-af2b-e688832e5286 | 41.0086 | -73.9482 | NJ | Closter | US | fl:li_ln:caspi-2 | 40.9709 | -73.969 | NJ | 0 | 10000008 | Bracelet mold and method of use | utility | 2018 | 2018-06-19 | 6 |
| 4 | Norwalk | US | 903324e5-5bbb-4187-8ee9-4f3291165199 | 41.0958 | -73.4205 | CT | Rochester | US | fl:ro_ln:irizarry-1 | 43.1855 | -77.6107 | NY | 0 | 10000010 | 3-D electrostatic printer using rack and pinio... | utility | 2018 | 2018-06-19 | 6 |
| 5 | Watertown | US | 94fabadd-7a5a-43dd-8afe-2e21db4d7481 | 42.372 | -71.1754 | MA | Cambridge | US | fl:gr_ln:mark-2 | 42.3784 | -71.1316 | MA | 0 | 10000011 | Supports for sintering additively manufactured... | utility | 2018 | 2018-06-19 | 6 |
Quick groupby to compare patent inventors by state
compare_inv_by_state = patent_details_df.groupby('patent_firstnamed_inventor_state')['patent_firstnamed_inventor_id'].count().sort_values(ascending=False)
Quick groupby to compare patent assignees by state
compare_ass_by_state = patent_details_df.groupby('patent_firstnamed_assignee_state')['patent_firstnamed_assignee_id'].count().sort_values(ascending=False)
#Convert the comparison results to data frame
compare_inv_by_state_df = compare_inv_by_state.to_frame()
compare_ass_by_state_df = compare_ass_by_state.to_frame()
#Drop index and rename columns
compare_inv_by_state_df.reset_index(inplace = True)
compare_inv_by_state_df.rename(columns={"patent_firstnamed_inventor_state": "state", "patent_firstnamed_inventor_id": "no_of_inventors"}, inplace = True)
compare_inv_by_state_df
| state | no_of_inventors | |
|---|---|---|
| 0 | CA | 2316 |
| 1 | TX | 601 |
| 2 | NY | 497 |
| 3 | MA | 439 |
| 4 | WA | 420 |
| 5 | MI | 407 |
| 6 | IL | 252 |
| 7 | NJ | 245 |
| 8 | MN | 234 |
| 9 | OH | 218 |
| 10 | FL | 211 |
| 11 | PA | 209 |
| 12 | NC | 184 |
| 13 | OR | 172 |
| 14 | CO | 170 |
| 15 | CT | 154 |
| 16 | GA | 142 |
| 17 | WI | 139 |
| 18 | AZ | 136 |
| 19 | VA | 125 |
| 20 | IN | 123 |
| 21 | MD | 109 |
| 22 | UT | 88 |
| 23 | MO | 73 |
| 24 | SC | 62 |
| 25 | NH | 61 |
| 26 | ID | 53 |
| 27 | TN | 52 |
| 28 | KY | 47 |
| 29 | IA | 44 |
| 30 | KS | 41 |
| 31 | OK | 40 |
| 32 | AL | 32 |
| 33 | NV | 23 |
| 34 | NM | 23 |
| 35 | LA | 21 |
| 36 | RI | 20 |
| 37 | VT | 17 |
| 38 | NE | 13 |
| 39 | AR | 13 |
| 40 | ME | 13 |
| 41 | DE | 12 |
| 42 | DC | 10 |
| 43 | WV | 7 |
| 44 | WY | 7 |
| 45 | SD | 6 |
| 46 | ND | 6 |
| 47 | MS | 6 |
| 48 | MT | 6 |
| 49 | HI | 5 |
| 50 | PR | 3 |
#Drop index and rename columns
compare_ass_by_state_df.reset_index(inplace = True)
compare_ass_by_state_df.rename(columns={"patent_firstnamed_assignee_state": "state", "patent_firstnamed_assignee_id": "no_of_assignees"}, inplace = True)
compare_ass_by_state_df
| state | no_of_assignees | |
|---|---|---|
| 0 | CA | 2326 |
| 1 | NY | 751 |
| 2 | TX | 663 |
| 3 | MA | 427 |
| 4 | MI | 417 |
| 5 | WA | 343 |
| 6 | IL | 335 |
| 7 | NJ | 231 |
| 8 | NC | 224 |
| 9 | CT | 212 |
| 10 | GA | 207 |
| 11 | MN | 192 |
| 12 | OH | 182 |
| 13 | PA | 160 |
| 14 | FL | 154 |
| 15 | CO | 126 |
| 16 | VA | 105 |
| 17 | IN | 101 |
| 18 | WI | 95 |
| 19 | AZ | 94 |
| 20 | MD | 89 |
| 21 | DE | 83 |
| 22 | MO | 68 |
| 23 | NV | 65 |
| 24 | DC | 64 |
| 25 | UT | 62 |
| 26 | TN | 60 |
| 27 | ID | 53 |
| 28 | OR | 53 |
| 29 | KS | 47 |
| 30 | NH | 37 |
| 31 | IA | 37 |
| 32 | SC | 35 |
| 33 | PR | 30 |
| 34 | KY | 23 |
| 35 | OK | 22 |
| 36 | NM | 22 |
| 37 | RI | 18 |
| 38 | AL | 18 |
| 39 | LA | 18 |
| 40 | AR | 16 |
| 41 | NE | 11 |
| 42 | VT | 6 |
| 43 | SD | 6 |
| 44 | WV | 4 |
| 45 | MT | 4 |
| 46 | HI | 4 |
| 47 | ND | 2 |
| 48 | ME | 2 |
| 49 | WY | 2 |
| 50 | MS | 1 |
compare_by_state_df = compare_inv_by_state_df.merge(compare_ass_by_state_df, how='inner', on='state')
compare_by_state_df
| state | no_of_inventors | no_of_assignees | |
|---|---|---|---|
| 0 | CA | 2316 | 2326 |
| 1 | TX | 601 | 663 |
| 2 | NY | 497 | 751 |
| 3 | MA | 439 | 427 |
| 4 | WA | 420 | 343 |
| 5 | MI | 407 | 417 |
| 6 | IL | 252 | 335 |
| 7 | NJ | 245 | 231 |
| 8 | MN | 234 | 192 |
| 9 | OH | 218 | 182 |
| 10 | FL | 211 | 154 |
| 11 | PA | 209 | 160 |
| 12 | NC | 184 | 224 |
| 13 | OR | 172 | 53 |
| 14 | CO | 170 | 126 |
| 15 | CT | 154 | 212 |
| 16 | GA | 142 | 207 |
| 17 | WI | 139 | 95 |
| 18 | AZ | 136 | 94 |
| 19 | VA | 125 | 105 |
| 20 | IN | 123 | 101 |
| 21 | MD | 109 | 89 |
| 22 | UT | 88 | 62 |
| 23 | MO | 73 | 68 |
| 24 | SC | 62 | 35 |
| 25 | NH | 61 | 37 |
| 26 | ID | 53 | 53 |
| 27 | TN | 52 | 60 |
| 28 | KY | 47 | 23 |
| 29 | IA | 44 | 37 |
| 30 | KS | 41 | 47 |
| 31 | OK | 40 | 22 |
| 32 | AL | 32 | 18 |
| 33 | NV | 23 | 65 |
| 34 | NM | 23 | 22 |
| 35 | LA | 21 | 18 |
| 36 | RI | 20 | 18 |
| 37 | VT | 17 | 6 |
| 38 | NE | 13 | 11 |
| 39 | AR | 13 | 16 |
| 40 | ME | 13 | 2 |
| 41 | DE | 12 | 83 |
| 42 | DC | 10 | 64 |
| 43 | WV | 7 | 4 |
| 44 | WY | 7 | 2 |
| 45 | SD | 6 | 6 |
| 46 | ND | 6 | 2 |
| 47 | MS | 6 | 1 |
| 48 | MT | 6 | 4 |
| 49 | HI | 5 | 4 |
| 50 | PR | 3 | 30 |
From the above comparison, it is clear that the state of California has the highest number of patent inventors and patent assignees in this dataset.
The patent information is further explored with different visualization packages like ipywidgets, plotly, Bokeh and Streamlit.
#Get the list of columns present in the patent_details_df dataframe
patent_data_cols = patent_details_df.columns.to_list()
patent_data_cols
['patent_firstnamed_assignee_city', 'patent_firstnamed_assignee_country', 'patent_firstnamed_assignee_id', 'patent_firstnamed_assignee_latitude', 'patent_firstnamed_assignee_longitude', 'patent_firstnamed_assignee_state', 'patent_firstnamed_inventor_city', 'patent_firstnamed_inventor_country', 'patent_firstnamed_inventor_id', 'patent_firstnamed_inventor_latitude', 'patent_firstnamed_inventor_longitude', 'patent_firstnamed_inventor_state', 'patent_num_cited_by_us_patents', 'patent_number', 'patent_title', 'patent_type', 'patent_year', 'patent_date', 'patent_month']
Create the widget for the columns present in the dataframe. The options=data_cols will populate the dropdown options with our column names.
pick_col = widgets.Dropdown(
options=patent_data_cols,
description='Column',layout={'width': 'max-content'}
)
display(pick_col)
Dropdown(description='Column', layout=Layout(width='max-content'), options=('patent_firstnamed_assignee_city',…
patent_details_df['patent_firstnamed_inventor_city'].value_counts().size
2003
Create a wrapper function that takes a data column from the dataframe and creates a countplot using seaborn.
def create_countplot(col):
ax = sns.countplot(y=patent_details_df[col], data=patent_details_df,
order=patent_details_df[col].value_counts().iloc[:10].index)
ax.set(xlabel='Number of patents');
Using interact function create user interface (UI) controls for exploring the patent data
widgets.interact(create_countplot, col=pick_col);
interactive(children=(Dropdown(description='Column', layout=Layout(width='max-content'), options=('patent_firs…
Create a wrapper function that takes number of categories to be displayed as input argument and creates a countplot of top inventor states using seaborn.
def countplot_top_inv_states(n):
ax = sns.countplot(y=patent_details_df['patent_firstnamed_inventor_state'], data=patent_details_df,
order=patent_details_df['patent_firstnamed_inventor_state'].value_counts(ascending=False).iloc[:n].index)
ax.set(xlabel='Number of patents')
ax.set(ylabel='State');
style = {'description_width': 'initial'}
pick_n = widgets.IntSlider(
value=1,
min=2,
max=15,
step=1,
style=style,
description='Number of categories'
)
widgets.interact(countplot_top_inv_states, n=pick_n);
interactive(children=(IntSlider(value=2, description='Number of categories', max=15, min=2, style=SliderStyle(…
Scatter plot of number of inventors and no of assignees by state using plotly express
fig = px.scatter(compare_by_state_df, x="no_of_inventors", y="no_of_assignees", color="state")
fig.show()
Exploring the locations of patent inventors using scatter_geo map. The latitude and longitude values of the inventor are used in the plot.
fig = px.scatter_geo(patent_details_df,lat=patent_details_df['patent_firstnamed_inventor_latitude'],
lon=patent_details_df['patent_firstnamed_inventor_longitude'],
hover_name=patent_details_df['patent_firstnamed_inventor_city'],
hover_data=["patent_firstnamed_inventor_id", "patent_firstnamed_inventor_state"])
fig.update_layout(title = 'Exploring the locations of patent inventors - 2018', title_x=0.5)
fig.show()
Exploring the locations of patent assignees using scatter_geo map. The latitude and longitude values of the assignees are used in the plot.
fig = px.scatter_geo(patent_details_df,lat=patent_details_df['patent_firstnamed_assignee_latitude'],
lon=patent_details_df['patent_firstnamed_assignee_longitude'], hover_name=patent_details_df['patent_firstnamed_assignee_city'],
hover_data=["patent_firstnamed_assignee_id", "patent_firstnamed_assignee_state"])
fig.update_layout(title = 'Exploring the locations of patent assignees - 2018', title_x=0.5)
fig.show()
From the geo map we can see higher presence of inventors and assignees in the mid west and north eastern part of the United States.
Create a bar plot to visualize the top inventor cities with most number of patents in the dataset.
Using groupby the number of patents by inventor city is retrieved and the results are stored in cat_inventor_city_df dataframe
cat_inventor_city = patent_details_df.groupby(['patent_firstnamed_inventor_city'])['patent_number'].count().sort_values(ascending=False).head(10)
cat_inventor_city_df = cat_inventor_city.to_frame()
cat_inventor_city_df.reset_index(inplace = True)
cat_inventor_city_df.rename(columns={"patent_firstnamed_inventor_city": "inventor_city", "patent_number": "no_of_patents"}, inplace = True)
cat_inventor_city_df.set_index(keys = 'inventor_city', inplace = True)
cat_inventor_city_df
| no_of_patents | |
|---|---|
| inventor_city | |
| San Jose | 270 |
| San Francisco | 212 |
| San Diego | 188 |
| Seattle | 125 |
| Austin | 125 |
| Sunnyvale | 99 |
| Houston | 94 |
| Palo Alto | 92 |
| Cupertino | 88 |
| Mountain View | 84 |
import pandas_bokeh
pandas_bokeh.output_notebook()
pd.set_option('plotting.backend', 'pandas_bokeh')
# Create Bokeh-Table with DataFrame:
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models import ColumnDataSource
cat_inventor_city_df.plot_bokeh(kind="bar",title ="Top 10 Cities with most patents",figsize =(900,400),xlabel = "inventor city",ylabel="no of patents");
From the above bar chart, we can see that the city San Jose has most number of patents followed by San Francisco and San Diego.
Bar charts for visualizing the top assignee cities and top inventor cities are created using Streamlit.
Refer patent_barchart.py for addtional details.
Using groupby the number of patents by assignee city is retrieved and the results are stored in cat_assignee_city_df dataframe
cat_assignee_city = patent_details_df.groupby(['patent_firstnamed_assignee_city'])['patent_number'].count().sort_values(ascending=False).head(10)
cat_assignee_city_df = cat_assignee_city.to_frame()
cat_assignee_city_df.reset_index(inplace = True)
cat_assignee_city_df.rename(columns={"patent_firstnamed_assignee_city": "assignee_city", "patent_number": "no_of_patents"}, inplace = True)
cat_assignee_city_df.set_index(keys = 'assignee_city', inplace = True)
st.title('Patent Dashboard')
st.markdown('The dashboard will visualize the patent data retrieved from the patent API')
st.markdown('## **Patent data**')
st.dataframe(patent_details_df)
st.markdown('## **Patents by inventor city**')
st.bar_chart(cat_inventor_city_df)
st.markdown('## **Patents by assignee city**')
st.bar_chart(cat_assignee_city_df)
2022-06-24 20:55:14.297
Warning: to view this Streamlit app on a browser, run it with the following
command:
streamlit run C:\Users\mbala\anaconda3\envs\aap\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)
# Create a Group by object using patent_date
grp_date = patent_details_df.groupby('patent_date')
# Compute number of patents by date and check out the result
patents_by_date = pd.DataFrame(grp_date.size(), columns=['no_of_patents'])
patents_by_date
| no_of_patents | |
|---|---|
| patent_date | |
| 2018-06-19 | 1498 |
| 2018-06-26 | 2210 |
| 2018-07-03 | 2322 |
| 2018-07-10 | 2268 |
| 2018-07-17 | 9 |
mean_patents = patents_by_date['no_of_patents'].mean()
tot_patent_dates = patents_by_date['no_of_patents'].count()
num_days = (patents_by_date.index.max() - patents_by_date.index.min()).days + 1
print("The mean number of patents per day is {:.2f}. The mean is based on {} patent dates."
.format(mean_patents, tot_patent_dates))
print("The beginning of the date range is {}.".format(patents_by_date.index.min()))
print("The end of the date range is {}.".format(patents_by_date.index.max()))
print("The are {} days in the date range.".format(num_days))
The mean number of patents per day is 1661.40. The mean is based on 5 patent dates. The beginning of the date range is 2018-06-19 00:00:00. The end of the date range is 2018-07-17 00:00:00. The are 29 days in the date range.
Plotting number of patents by patent_date
patent_details_df_2 = patent_details_df.copy()
patents_details_by_date = patent_details_df_2.groupby('patent_date')['patent_number'].count().sort_values(ascending=False)
pat_det_by_date_df = patents_details_by_date.to_frame()
pat_det_by_date_df.reset_index(inplace = True)
pat_det_by_date_df.rename(columns={"patent_date": "date", "patent_number": "no_of_patents"}, inplace = True)
pat_det_by_date_df
pat_det_by_date_df.set_index('date', inplace = True)
pat_det_by_date_df['no_of_patents'].plot();
From the above plot, we can see a spike in the number of patents in the first two weeks of July. Since the API version returns only 10000 patents, the year is limited to 2018 data only.